CREATE OR REPLACE FUNCTION "pipeline"."pca_dense_sample"("source_table" text, "out_table_model" varchar, "out_table_result" varchar, "residual_table" varchar, "result_summary_table" varchar, "id_col" varchar, "feature_cols" varchar, "k" int4, "proportion" float8, "isK" int4)
  RETURNS "pg_catalog"."text" AS $BODY$
    import json
    import time
    
    def columnsToVecTable(sourceTable, featureCols, otherCols, outTable):
        featureLists = featureCols.split(",")
        tmpList = []
        for item in featureLists:
            tmpList.append('"{}"'.format(item))
        otherLists = []
        for item in otherCols.split(","):
            otherLists.append('"{}"'.format(item))        
        sqlStr = "SELECT madlib.cols2vec('{}', '{}', '{}', NULL, '{}')".format(sourceTable, outTable, ",".join(tmpList), ",".join(otherLists))
        msg = "success"
        try:
            rs = plpy.execute(sqlStr)
        except Exception as e:
            plpy.execute("DROP TABLE IF EXISTS {}".format(outTable))
            plpy.execute("DROP TABLE IF EXISTS {}_summary".format("outTable"))
            msg = "sql=[{}] errorMsg=[{}]".format(sqlStr, str(e))
            return False, msg
        return True, msg
        
    def pcaTrain(sourceTable, outTable, idCol, compentParam):
        sqlStr = "SELECT madlib.pca_train('{}', '{}', '{}', {})".format(sourceTable, outTable, idCol, compentParam)
        msg = "success"
        try:
            rs = plpy.execute(sqlStr)
        except Exception as e:
            plpy.execute("DROP TABLE IF EXISTS {}".format(outTable))
            msg = "sql=[{}] errorMsg=[{}]".format(sqlStr, str(e))
            return False, msg
        return True, msg
        
    def pcaProject(matTable, pcTable, outTable, idCol, residualTable, resultSummaryTable):
        sqlStr = "SELECT madlib.pca_project('{}', '{}', '{}', '{}', '{}', '{}')".format(matTable, pcTable, outTable, idCol, residualTable, resultSummaryTable)
        msg = "success"
        try:
            rs = plpy.execute(sqlStr)
        except Exception as e:
            plpy.execute("DROP TABLE IF EXISTS {}".format(outTable))
            plpy.execute("DROP TABLE IF EXISTS {}".format(residualTable))
            plpy.execute("DROP TABLE IF EXISTS {}".format(resultSummaryTable))
            plpy.execute("DROP TABLE IF EXISTS {}".format(pcTable))
            plpy.execute("DROP TABLE IF EXISTS {}".format(matTable))
            msg = "sql=[{}] errorMsg=[{}]".format(sqlStr, str(e))
            return False, msg
        return True, msg
    
    def cleanTable(tables):
        for table in tables:
            try:
                plpy.execute("DROP TABLE IF EXISTS {}".format(table))
            except Exception as e:
                continue
    
    def transResultTable(inputTable, outResultTable, k, idCol):
        selectBody = []
        for i in range(k):
            selectBody.append("row_vec[{}] as f{}".format(str(i + 1), str(i + 1)))
        sqlStr = "CREATE TABLE {} AS SELECT row_id as {},{} FROM {}".format(outResultTable, idCol, ",".join(selectBody), inputTable)
        try:
            plpy.execute(sqlStr)
        except Exception as e:
            plpy.execute("DROP TABLE IF EXISTS {}".format(outResultTable))
            msg = "{} sql={}".format(str(e), sqlStr)
            return False, msg
        return True, "success"
    
    def logError(msg, status, result):
        result["status"] = status
        result["error_msg"] = msg
        return json.dumps(result)
    
    def begin(source_table, out_table_model, out_table_result, residual_table, result_summary_table, id_col, feature_cols, k, proportion, isK):
        result = {
            "status": 0,
            "error_msg": "success",
            "result": {
                "input_params": {
                    "source_table": source_table,
                    "out_table_model": out_table_model,
                    "out_table_result": out_table_result,
                    "id_col": id_col,
                    "feature_cols": feature_cols,
                    "k": k,
                    "proportion": proportion,
                    "residual_table": residual_table,
                    "result_summary_table": result_summary_table,
                    "isK": isK
                },
                "output_params": [
                ]
            }
        }
        sourceTable = source_table
        if source_table.startswith("create view") or source_table.startswith("CREATE VIEW"):
            try:
                rs = plpy.execute(source_table)
                sourceTable = source_table.split(" ")[2]
            except Exception as e:
                plpy.execute("DROP VIEW IF EXISTS {}".format(sourceTable))
                msg = "sq={}, errorMsg={}".format(source_table, str(e))
                return logError(msg, 500, result)        
        matTable = "pipeline.tmp_vector_{}".format(int(time.time() * 10000))
        flag, msg = columnsToVecTable(sourceTable, feature_cols, id_col, matTable)
        if not flag:
            if source_table.lower().startswith("create view"):
                plpy.execute("DROP VIEW IF EXISTS {}".format(sourceTable))
                cleanTable([matTable])
            else:
                cleanTable([matTable])
            return logError(msg, 500, result)
        compentParam = k
        if isK != 1:
            compentParam = proportion
        flag, msg = pcaTrain(matTable, out_table_model, id_col, compentParam)
        if not flag:
            if source_table.lower().startswith("create view"):
                plpy.execute("DROP VIEW IF EXISTS {}".format(sourceTable))
                cleanTable([matTable, out_table_model])
            else:
                cleanTable([matTable, out_table_model])
            return logError(msg, 500, result)
        modelTable = {
            "out_table_name": out_table_model,
            "output_cols": ["row_id", "principal_components", "std_dev", "proportion"]
        }
        
        flag, msg = pcaProject(matTable, out_table_model, out_table_result + "_tmp", id_col, residual_table, result_summary_table)
        if not flag:
            if source_table.lower().startswith("create view"):
                plpy.execute("DROP VIEW IF EXISTS {}".format(sourceTable))
                cleanTable([matTable, out_table_model, out_table_result, out_table_result + "_tmp", residual_table, result_summary_table])
            else:
                cleanTable([matTable, out_table_model, out_table_result, out_table_result + "_tmp", residual_table, result_summary_table]) 
            return logError(msg, 500, result)
        flag, msg = transResultTable(out_table_result + "_tmp", out_table_result, k, id_col)
        if not flag:
            if source_table.lower().startswith("create view"):
                plpy.execute("DROP VIEW IF EXISTS {}".format(sourceTable))
                cleanTable([matTable, out_table_model, out_table_result, out_table_result + "_tmp", residual_table, result_summary_table])
            else:
                cleanTable([matTable, out_table_model, out_table_result, out_table_result + "_tmp", residual_table, result_summary_table])
            return logError(msg, 500, result)
        cleanTable([out_table_result + "_tmp", residual_table, result_summary_table, matTable])
        if source_table.lower().startswith("create view"):
            plpy.execute("DROP VIEW IF EXISTS {}".format(sourceTable))
        outputCols = [id_col]
        tmpCols = ["f{}".format(str(i + 1)) for i in range(k)]
        outputCols.extend(tmpCols)
        resultTable = {
            "out_table_name": out_table_result,
            "output_cols": outputCols
        }
        
        result["result"]["output_params"].append(resultTable)
        result["result"]["output_params"].append(modelTable)
        
        return json.dumps(result)
    if __name__ == "__main__":
        return begin(source_table, out_table_model, out_table_result, residual_table, result_summary_table, id_col, feature_cols, k, proportion, isK)
$BODY$
  LANGUAGE plpythonu VOLATILE
  COST 100